** Further collapse similarity measures, merge with store characteristics 
** JHL  

** Similarity measures 
** For each pair of stores i and j in chain c, average across products and quarters 
** 1. Weekly correlation in deviation from quarterly mean

*************************************
** Set up workspace
*************************************
version 15.0
clear all
set more off
** include "/scratch/midway2/jleung/replication/do/set_path.do"
cd "${path_home}"
adopath + ../programs

** log using "${path_log}/06_similarity_top1", text replace

*************************************
** Start work here
*************************************
timer on 1 

*************************************
** [1] Collapse similarity measures
*************************************

** [1.3] Differences in correlations across states within parent 

clear 
gen channel_code="" 
gen geo = "" 
gen module=.
** Note 7734 M only one parent, not created 
foreach cc in D F M {
	foreach m in 1040 1290 1303 1362 1463 1484 1493 3603 7080 7260 7734 8404 { 
		cap noi append using "${path_dta}/nielsen/`m'_top1_`cc'_cor_parent"
		replace channel_code="`cc'" if channel_code==""
		replace module=`m' if module==.
		replace geo = "all" if geo==""
	}
}
foreach g in state {
	foreach cc in D F M {
		foreach m in 1040 1290 1303 1362 1463 1484 1493 3603 7080 7260 7734 8404 { 
			cap noi append using "${path_dta}/nielsen/`m'_top1_`cc'_cor_parent_x_`g'_by_parent"
			replace channel_code="`cc'" if channel_code==""
			replace module=`m' if module==.
			replace geo = "`g'" if geo==""
		}
	}	
}

sort channel_code geo module parent_code_1
** Percentage difference measure instead 
gen pct_d_cor_wi_parent = (correlation4 - correlation3)/(correlation4 + correlation3) 
gen pct_d_cor_wi_geo = (correlation4 - correlation2)/(correlation4 + correlation2)
gen pct_d_cor_wi_all = (correlation2 - correlation1)/(correlation1 + correlation2)

** Summarize the measures somehow in a systematic way 
** list module pct_d_cor_wi_parent geo correlation2 correlation3 correlation4 count4 store_code_uc_14 if parent_code_1==5851&channel_code=="M"	

** egen m_d_cor_wi_parent = mean(d_cor_wi_parent), by(parent_code_1 channel_code) 

** Histogram, use continuous measures or classify into rigid and non-rigid chains

** Drop outliers / use median (avoid small sample size measures) 	
foreach v of varlist d_cor d_cor_wi_parent d_cor_wi_state count4 store_code_uc_14 pct_d_cor_wi_parent pct_d_cor_wi_geo pct_d_cor_wi_all {
	egen m_`v' = mean(`v'), by(parent_code_1 channel_code geo) 
}

collapse (median) d_cor d_cor_wi_parent d_cor_wi_state count4 store_code_uc_14 pct_d_cor_wi_parent pct_d_cor_wi_geo pct_d_cor_wi_all m_d_cor m_d_cor_wi_parent m_d_cor_wi_state m_count4 m_store_code_uc_14 m_pct_d_cor_wi_parent m_pct_d_cor_wi_geo m_pct_d_cor_wi_all, by(parent_code_1 channel_code geo) 

sort channel_code parent_code_1 geo

keep if geo == "state"

** Rough cutoff at 0.05, results should be robust to this cutoff, alternatively focus on triple diff which uses it as a continous measure 
** Mean, median, and percentage difference in both 
gen rigid1 = m_d_cor_wi_parent <= 0.05
gen rigid2 = d_cor_wi_parent <= 0.05 
gen rigid3 = m_pct_d_cor_wi_parent <= 0.05 
gen rigid4 = pct_d_cor_wi_parent <= 0.05 
gen rigid5 = m_pct_d_cor_wi_parent <= 0.03 
gen rigid6 = pct_d_cor_wi_parent <= 0.03 

drop if store_code_uc_14 == .

save "${path_dta}/nielsen/top1_cor_parent_x_state_by_parent", replace 


*************************************
** [2] Retail chain characteristics
*************************************

** [2.1] First check number of states each chain is in 

use "${path_big_dta}/PI/PIq_stores_0615" if yq==yq(2006,1), clear 
		
merge m:1 store_code_uc year using "${path_big_dta}/nielsen/stores_0615", keep(master match) nogen 

** For each chain, generate number of states it's in + number of stores 

** Generate a tag for each distinct parent x state (store), then sum over parents
egen ps_tag = tag(parent_code fips_state_code year)
egen pstate = total(ps_tag), by(parent_code year)

egen store_tag = tag(store_code_uc year)
egen pstore = total(store_tag), by(parent_code year)

** Similar summary using retailer_code instead 
** Generate a tag for each distinct retailer x state (store), then sum over retailers
if 0 == 1 {
	egen rs_tag = tag(retailer_code fips_state_code)
	egen rstate = total(rs_tag), by(retailer_code)

	egen rstore = total(store_tag), by(retailer_code)

	ta retailer_code, su(rstate)
	ta retailer_code, su(rstore)
}

egen p_tag = tag(parent_code)
su pstate pstore if p_tag==1, d
 
gen county = fips_state_code*1000 + fips_county_code 

save "${path_dta}/nielsen/stores_2006_chain_chars", replace 

*************************************
** Close workspace
*************************************
timer off 1
timer list 1
** log close
